require(stringr)
require(tidyr)
require(dplyr)
require(ltm)
require(mirt)
require(RColorBrewer)
color <- brewer.pal(9, "Set1")
options(scipen = 999)

# read SmartNews House of Representatives bill database
VDF <- read.csv("gian.csv")

# keep bills with clear majority/minority positions
VDF <- subset(VDF, 衆議院審議時会派態度 %in% c("多数", "少数"))
# drop bills with no recorded opposing parties
VDF <- subset(VDF, 衆議院審議時反対会派 != "")
# extract calendar year from Japanese era notation in bill dates
year <- ifelse(grepl("^平成26年", VDF$衆議院審議終了年月日.衆議院審議結果), 2014,
               ifelse(grepl("^平成27年", VDF$衆議院審議終了年月日.衆議院審議結果), 2015,
                      ifelse(grepl("^平成28年", VDF$衆議院審議終了年月日.衆議院審議結果), 2016,
                             ifelse(grepl("^平成29年", VDF$衆議院審議終了年月日.衆議院審議結果), 2017,
                                    ifelse(grepl("^平成30年", VDF$衆議院審議終了年月日.衆議院審議結果), 2018,
                                           ifelse(grepl("^平成31年", VDF$衆議院審議終了年月日.衆議院審議結果), 2019, 
                                                  ifelse(grepl("^令和元年", VDF$衆議院審議終了年月日.衆議院審議結果), 2019, NA)))))))
VDF <- cbind(VDF, year)
VDF <- subset(VDF, ! is.na(year))
# assign unique bill IDs
id <- 1:nrow(VDF)
VDF <- cbind(id, VDF)

# split pro-vote party lists and reshape into tidy format
df_long <- VDF %>%
  mutate(party_pro = str_split(衆議院審議時賛成会派, ";\\s*")) %>% 
  unnest(party_pro)
# list of unique parties appearing in pro-vote records
unique_parties_pro <- unique(df_long$party_pro)
# convert to wide format (presence/absence coded as 1/0)
df_wide <- df_long %>%
  mutate(value = 1) %>%
  pivot_wider(
    id_cols = id,
    names_from = party_pro,
    values_from = value,
    values_fill = list(value = 0)
  )
df_pro <- as.data.frame(df_wide)

# split con-vote party lists and reshape into tidy format
df_long <- VDF %>%
  mutate(party_con = str_split(衆議院審議時反対会派, ";\\s*")) %>% 
  unnest(party_con)
# list of unique parties appearing in con-vote records
unique_parties_con <- unique(df_long$party_con)
# convert to wide format (presence/absence coded as 1/0)
df_wide <- df_long %>%
  mutate(value = 1) %>%
  pivot_wider(
    id_cols = id,
    names_from = party_con,
    values_from = value,
    values_fill = list(value = 0)
  )
df_con <- as.data.frame(df_wide)

# unified list of parties appearing in pro or con votes
unique_parties <- unique(c(unique_parties_pro, unique_parties_con))

# construct party-by-bill vote matrices for each year
for (y in 2014:2019){
  id.y <- subset(VDF, year==y)$id
  df.y <- as.data.frame(matrix(NA, nrow = length(unique_parties), ncol = length(id.y)))
  colnames(df.y) <- id.y
  rownames(df.y) <- unique_parties
  for (i in 1:length(unique_parties)){
    party.i <- unique_parties[i]
    for (k in 1:length(id.y)){
      pro.i.k <- df_pro[df_pro$id==id.y[k], party.i]  # whether party i supported bill k
      con.i.k <- df_con[df_con$id==id.y[k], party.i]  # whether party i opposed bill k
      # code votes as 1 (support), 0 (oppose), NA (not recorded)
      df.y[party.i, k] <- ifelse(pro.i.k==1, 1, 
                                 ifelse(con.i.k==1, 0, NA))
    }
  }
  # drop parties with no recorded votes in the given year
  df.y <- df.y[rowSums(!is.na(df.y)) > 0, , drop = FALSE]
  # exclude Komeito to align with party coverage in the main analysis
  df.y <- df.y[rownames(df.y) != "公明党", ]
  # remove parties with all votes missing (including parties not active in that year)
  df.y <- df.y[rowSums(!is.na(df.y), na.rm = TRUE) > 0, , drop = FALSE]
  # estimate one-dimensional IRT model based on bill voting
  set.seed(12345)
  mod_ltm <- mirt(df.y, 1, itemtype = "2PL", SE = TRUE,
                  technical = list(NCYCLES=10000))
  # extract party ideal points
  theta <- fscores(mod_ltm, method="EAP", full.scores.SE = TRUE)
  # store year-specific IRT results
  position <- theta[, 1]
  df.y <- cbind(position, df.y)
  assign(paste0("IRT_", y), df.y) 
}

# compare party positions with bill-based IRT estimates (2014-2019)
P_results <- read.csv("3-a_party_estimates.csv")
party.v <- c("CDP_HOR", "DPJ_HOR", "JCP_HOR", "JRP_HOR", "JSP_HOR", 
             "LDP_HOR", "PH_HOR", "TPJ_HOR", "Your Party_HOR")
# restrict to parties and years covered by the bill data
P_results <- subset(P_results, party %in% party.v & year >= 2014 & year <= 2019)
IRTscore <- rep(NA, nrow(P_results))
P_results <- cbind(P_results, IRTscore)
for (i in 1:nrow(P_results)){
  party.i <- P_results[i, "party"]
  year.i <- P_results[i, "year"]
  if (party.i == "CDP_HOR") {
    party.vote <- c("立憲民主党・市民クラブ",
                    "立憲民主党・無所属フォーラム",
                    "立憲民主・国民・社保・無所属フォーラム")
  } else if (party.i == "DPJ_HOR") {
    party.vote <- c("民主党・無所属クラブ",
                    "民進党・無所属クラブ",
                    "民主・維新・無所属クラブ")
  } else if (party.i == "JCP_HOR") {
    party.vote <- "日本共産党"
  } else if (party.i == "JRP_HOR") {
    party.vote <- c("日本維新の会",
                    "おおさか維新の会",
                    "維新の党",
                    "民主・維新・無所属クラブ")
  } else if (party.i == "JSP_HOR") {
    party.vote <- "社会民主党・市民連合"
  } else if (party.i == "LDP_HOR") {
    party.vote <- c("自由民主党",
                    "自由民主党・無所属の会")
  } else if (party.i == "PH_HOR") {
    party.vote <- c("希望の党",
                    "希望の党・無所属クラブ")
  } else if (party.i == "TPJ_HOR") {
    party.vote <- c("生活の党",
                    "自由党",
                    "生活の党と山本太郎となかまたち")
  } else if (party.i == "Your Party_HOR") {
    party.vote <- "みんなの党"
  } else {
    party.vote <- NA
  }
  irt.i <- get(paste0("IRT_", year.i))
  irt.position.v <- irt.i[rownames(irt.i) %in% party.vote, ]$position
  # average across multiple caucus labels, if applicable
  P_results[i, "IRTscore"] <- mean(irt.position.v)
}

# plotting parameters by party
p.color <- ifelse(P_results$party == "LDP_HOR", color[1],
                  ifelse(P_results$party == "JSP_HOR", color[2],
                         ifelse(P_results$party == "JCP_HOR", color[4], 
                                ifelse(P_results$party == "DPJ_HOR", color[3], 
                                       "gray30"))))
p.pch <- ifelse(P_results$party == "LDP_HOR", "L",
                ifelse(P_results$party == "JSP_HOR", "S",
                       ifelse(P_results$party == "JCP_HOR", "C", 
                              ifelse(P_results$party == "DPJ_HOR", "d", 
                                     "o"))))

z <- lsfit(P_results$point, P_results$IRTscore, intercept = TRUE)  # fitted regression line
cor.estimate <- round(cor.test(P_results$point, P_results$IRTscore)$estimate, 2)  # correlation coefficient
cor.estimate <- format(cor.estimate, nsmall = 2)

# Figure A2: correlation between party-year estimates and bill-based IRT ideal points (2014-2019)
png("Figure_A2.png", 
    width = 6, height = 6, units = "in", pointsize = 12, res = 1200)
par(mar = c(8, 5, 1, 1))   
plot(P_results$point, P_results$IRTscore, col = p.color, pch = p.pch, cex = 1.5, cex.lab = 1.5, 
     main = "", 
     xlab = "This study\u2019s latent trait estimates", ylab = "Debate-based Ideal Points")
abline(z, col = "grey", lty="dashed", lwd = 3)
legend("topleft", legend = paste0("r = ", cor.estimate), 
       bty = "n", cex = 1.5, x.intersp = 0)
legend((par()$usr[2] + par()$usr[1]) / 2, 
       par()$usr[3] - (par()$usr[4] - par()$usr[3]) / 4.5, 
       legend = c("LDP   ", "JSP/SDP   ", "JCP   ", "DPJ   ", "others   "),
       col = c(color[c(1, 2, 4, 3)], "gray30"),
       pch = c("L", "S", "C", "d", "o"), bty = "n", cex = 1, xjust = 0.5, 
       ncol = 4, xpd = TRUE)
dev.off()
